1 Summary of data reduction and run:

Rachel add in a description of how the run went: - This was my first gasbench run, broadly went poorly - I really mucked up my standards somehow. They were all wildly out of order. See the wild re-ordering block below

There were three samples that had data come out of it: S75, B105, O95 There were four samples had did not have usable data (large stdev): S83, S100, B50, O75

Precision on the scale corrections were +/ x.xxx permil (d13C) and +/- x.xxx permil (d18O). All analyses (samples = sa, standards = st) culled are listed below (see output file for details on culling):

Examples: - Analysis 22995, CU YULE, monitoriing standard, very low yield, large atm. peaks - Analysis 23009, IAEA-C2, drift standard, very low yield, large atm. peaks

  • xxxx - major outlier in d13C and d18O; exact reason not clear (st)
  • xxxx - yield problem; too small and so had high between peak stdevs (st)
  • xxxx - Sample XXXX; too small; high between peak stdev; 20 peaks so likely atmosphere leak (sa)
  • xxxx - yield problem; too large and outlier in d13C and d18O (st)
  • xxxx - yield problem; too small; high between peak stdev; 20 peaks so likely atmosphere leak (st)
  • xxxx - major outlier in d13C and d18O; exact reason not clear (st)
  • xxxx - smaller than linearity bracket (sa)
  • xxxx - major outlier in d13C and d18O; exact reason not clear (st)

2 Load libraries, data, and define standards

2.1 load necessary R libraries and data, define accepted standard values for correcting standards

#devtools::install_github("isoverse/isoprocessor") 

library(isoprocessor)
## Loading required package: isoreader
## 
## Attaching package: 'isoreader'
## The following object is masked from 'package:stats':
## 
##     filter
## 
## Attaching package: 'isoprocessor'
## The following objects are masked from 'package:isoreader':
## 
##     iso_calculate_ratios, iso_convert_signals, iso_convert_time,
##     iso_plot_continuous_flow_data, iso_plot_dual_inlet_data,
##     iso_plot_raw_data
## The following object is masked from 'package:stats':
## 
##     filter
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks isoprocessor::filter(), isoreader::filter(), stats::filter()
## x dplyr::lag()    masks stats::lag()
library(stringr)
library(openxlsx)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(isoreader)
library(isoprocessCUBES) #if loading this package for the first time, be sure to install the "devtools" package first, then install the isoprocessCUBES package: devtools::install_github("CUBESSIL/isoprocessCUBES")

2.2 load data

rawfiles.directory<- file.path(".")  #enter the name of your raw data folder here; should be in the same directory as your .Rproj file

session<-"REH_modcarbs"  #update this to name your own session
Run <- "1"

dxf_files<-iso_read_continuous_flow(rawfiles.directory,read_vendor_data_table = TRUE,quiet = T) #reads in raw data files and extracts necessary data
dxf_files <- iso_omit_files_with_problems(dxf_files)
## Warning: iso_filter_files_with_problems() was renamed and will be
## removed in a future version of the isoreader package. Please use
## iso_filter_files_with_problems() directly instead to make your code future-
## proof.
## Info: removing 0/45 files that have any error (keeping 45)
dxf_data <- iso_get_vendor_data_table(dxf_files, include_file_info = everything()) #converts necessary data into usable data frame
## Info: aggregating vendor data table from 45 data file(s), including file info 'everything()'
raw.data <- dxf_data %>% 
  # exclude flush gas; change to check gas if that is the term used
  filter(`Identifier 2` !="Flush Gas") %>%
  #changes column names to match input file headers; adjust as needed if someone puts data in different column than normal in the sequence file
  rename(row = Row,      
         mass = Comment, 
         Identifier1 = `Identifier 1`, 
         type = `Identifier 2`,
         d13Craw = `d 13C/12C`,
         d18Oraw = `d 18O/16O`,
         Area44 = `Intensity 44`) %>%    
  #changes mass data type to numeric data instead of character
  mutate(mass = as.numeric(mass), row = as.numeric(row))  %>%
  #if needed, make specific changes in individual cells; comment out if not needed; this example removes a space of some of the entries for "sample" so they are all the same
  mutate(#type = str_replace(type, "sampled", "mon.std"),
         `Identifier1` = ifelse(`Identifier1`== "YULE", "CU YULE", `Identifier1`),
          Identifier1 = ifelse(Analysis== 20302, "CU YULE", Identifier1),
          Identifier1 = ifelse(Analysis== 20303, "IAEA-603", Identifier1),
          Identifier1 = ifelse(Analysis== 20304, "Merck", Identifier1),
         
          Identifier1 = ifelse(Analysis == 22992, "IAEA-C2", Identifier1),
          type = ifelse(Analysis== 22992, "drift.std", type),
          Identifier1 = ifelse(Analysis == 22993, "USGS44", Identifier1),
          type = ifelse(Analysis== 22993, "dis2.std", type),
          Identifier1 = ifelse(Analysis == 22994, "NBS18", Identifier1),
          type = ifelse(Analysis== 22994, "dis.std", type),
          Identifier1 = ifelse(Analysis == 22995, "CU YULE", Identifier1),
          type = ifelse(Analysis== 22995, "mon.std", type),
          
          Identifier1 = ifelse(Analysis == 23006, "NBS18", Identifier1),
          type = ifelse(Analysis== 23006, "dis.std", type),
          Identifier1 = ifelse(Analysis == 23007, "CU YULE", Identifier1),
          type = ifelse(Analysis== 23007, "mon.std", type),
          Identifier1 = ifelse(Analysis == 23008, "USGS44", Identifier1),
          type = ifelse(Analysis== 23008, "dis2.std", type),
          Identifier1 = ifelse(Analysis == 23009, "IAEA-C2", Identifier1),
          type = ifelse(Analysis== 23009, "drift.std", type),
         
          Identifier1 = ifelse(Analysis == 23018, "IAEA-C2", Identifier1),
          type = ifelse(Analysis== 23018, "drift.std", type),
          Identifier1 = ifelse(Analysis == 23019, "USGS44", Identifier1),
          type = ifelse(Analysis== 23019, "dis2.std", type),
          Identifier1 = ifelse(Analysis == 23020, "NBS18", Identifier1),
          type = ifelse(Analysis== 23020, "dis.std", type),
          Identifier1 = ifelse(Analysis == 23021, "CU YULE", Identifier1),
          type = ifelse(Analysis== 23021, "mon.std", type),
         
         Identifier1 = ifelse(Analysis == 23022, "O95_whole", Identifier1),
          type = ifelse(Analysis== 23022, "sample", type),
         
         ) 
  
#another way to change miss labeled or typos
#updating name and mass for 2 swapped standards (based on their isotope values and adjacent positions)
raw.data[  raw.data$Analysis == 9100, "Identifier1"] <- "HIS"
raw.data[  raw.data$Analysis == 9100, "mass"] <- 99
raw.data[  raw.data$Analysis == 9101, "Identifier1"] <- "NBS18"
raw.data[  raw.data$Analysis == 9101, "mass"] <- 96

2.3 define standards and standard values

#input all standard values in PDB mineral values; be sure to adjust for YOUR standards
Standards <- readRDS(url("https://github.com/cubessil/clumpsDRcodes/blob/master/Standards.RDS?raw=true"))

 corr.std<-"IAEA-C2"  #enter your standard names here
 dis2.std<-"USGS44"
 dis.std<-"NBS18"
 mon.std<-"CU YULE"

 Standardsforthisrun <- Standards %>% select(Sample.ID, d13C,d18O) %>% filter(Sample.ID %in% c(corr.std, mon.std, dis.std, dis2.std)) %>%  
   rename("std.name"= Sample.ID,
          "C.acc" = d13C,
          "O.acc" = d18O)

 C.stds.table <- Standardsforthisrun %>% select(std.name, C.acc)
 O.stds.table <- Standardsforthisrun %>% select(std.name, O.acc)

#if you run just the code above it matches the tables as below. However at some point in the code it looks for C.acc and O.acc as a variable not from a table.
#if I add the below code it fixes it but as you can guess this is not ideal  
C.acc <-  C.stds.table %>% filter(std.name == corr.std)
C.acc <- C.acc$C.acc 
O.acc <-  O.stds.table %>% filter(std.name == corr.std)
O.acc <- O.acc$O.acc 

2.4 average peaks and identify samples with too few peaks for culling

data <- 
  raw.data %>% 
  filter(Nr. > 5) %>%
  group_by(row, file_id, Analysis, Identifier1, mass, type) %>% 
  summarize(
    num.peaks=n(),
    d13C.raw=mean(as.numeric(d13Craw)),
    d13C.sd=sd(d13Craw),
    d18O.raw.SMOW=mean(as.numeric(d18Oraw)),
    d18O.sd=sd(d18Oraw),
    area44=mean(as.numeric(Area44)),
    area44.sd=sd(Area44),
    inv.area44=1/area44
  ) %>% mutate(
    Do_not_use ="", 
    Why_culled =""
  )
## `summarise()` has grouped output by 'row', 'file_id', 'Analysis', 'Identifier1',
## 'mass'. You can override using the `.groups` argument.
culled.data<- raw.data %>% 
  group_by(row, file_id, Analysis, Identifier1, mass, type) %>% 
  summarize(
    num.peaks=n()) %>%
  filter (num.peaks<6)
## `summarise()` has grouped output by 'row', 'file_id', 'Analysis', 'Identifier1',
## 'mass'. You can override using the `.groups` argument.
culled.data <-  bind_rows(culled.data, filter(data, num.peaks==1))

data$d18O.raw<-(data$d18O.raw.SMOW-41.43)/1.04143 #d18O output is CO2 SMOW, so need to use the CO2 SMOW to min PDB conversion

stdev<-gather(filter(data, num.peaks>1, d18O.sd<100), key = "isotope",
              value= "stdev", d13C.sd, d18O.sd)

3 Initial dataset check plots and culling

3.1 First round of culling:

Identify outliers: Look at standard deviations of the stable isotope values of the individual peaks (typically there are 10 peaks). Often the “cutoff” of outliers tends to be sd of 0.075 - 0.1 permil, and coincides with small peak areas

sd.values<-ggplot(stdev, aes(x=row, y=stdev, fill=Identifier1)) +
  geom_point(size=3, shape=21) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(fill=FALSE) +
  facet_grid(. ~ isotope)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
sd.hist<-ggplot(stdev, aes(x=stdev, fill=isotope)) +
  geom_histogram(binwidth=.01) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(fill=FALSE) +
  facet_grid(. ~ isotope)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
sd.box<-ggplot(stdev, aes(x=isotope, y=stdev, fill=isotope)) +
  geom_boxplot()

sd.v.area44<-ggplot(stdev, aes(x=area44, y=stdev, fill=factor(num.peaks))) +
  geom_point(size=3, shape=21) +
  scale_fill_discrete() +
  facet_grid(. ~ isotope)

multiplot(sd.values, sd.hist, sd.box, sd.v.area44, cols=1)
## Loading required package: grid

- stdev vs. area44 plots show a link between how much gas there was and how well it ran. - if its standards that didn’t run well in addition to samples - then need to chase down another problem - oxygen lately isn’t running as well - stop and do maintenance on gasbench? - histogram looking for normal distribution around a ‘normal’ stdev

– go back and look at vials right after the run (next day) -> can help pinpoint yield problems.

Remove major outliers based on large standard deviation of peaks, then redo plots - also, adds culled data to “culled data” tab that shows samples and standards that shouldn’t be used

#adjust line 121: change the value of d18O.sd.cutoff to match the cutoff of outliers based on the previous plots; default is 0.1
d18O.sd.cutoff <- 0.3

culled.data <- bind_rows(culled.data, subset(data, d18O.sd>d18O.sd.cutoff))
wo.culled <- subset(data, d18O.sd<d18O.sd.cutoff)

stds1<- subset(data, type!="sample" & d18O.sd<d18O.sd.cutoff)

stdev<-gather(wo.culled, key = "isotope",
              value= "stdev", d13C.sd, d18O.sd)

sd.values<-ggplot(stdev, aes(x=row, y=stdev, fill=Identifier1)) +
  geom_point(size=3, shape=21) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(fill=FALSE) +
  facet_grid(. ~ isotope)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
sd.hist<-ggplot(stdev, aes(x=stdev, fill=isotope)) +
  geom_histogram(binwidth=.005) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(fill=FALSE) +
  facet_grid(. ~ isotope)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
sd.box<-ggplot(stdev, aes(x=isotope, y=stdev, fill=isotope)) +
  geom_boxplot()

sd.v.area44<-ggplot(stdev, aes(x=area44, y=stdev, fill=factor(num.peaks))) +
  geom_point(size=3, shape=21) +
  scale_fill_discrete() +
  facet_grid(. ~ isotope)
  
multiplot(sd.values, sd.hist, sd.v.area44, sd.box, cols=1)

3.2 Calculate and plot yields

Plot yields of all samples and standards, as well as just the standards, using INTERACTIVE plots. Look for major outliers that coincide with yield problems. Note that the linear model in the yield.stds figure here is based on ALL the standard data

Check to see that samples all fall within linearity range, and for any other obvious outliers: * do linearity standards cover the area space of your samples and other standards? * do all the standards show typical trends between area and mass? Do you see very general trends like that in your samples (sample sets with wide ranges in weight percent carbonate will not show a strong relationship)?

# adjust next line only: change #'s in stds.to.cull to reflect the row #'s that need to be culled, add row#'s as needed and rerun after looking at the new plots
stds.to.cull <- c(22995, 23009) # #Analysis #s with yield problem and/or significant outlier in d13C or d18O, including samples smaller than linearity range that weren't caught by other culling steps
                    
stds.culled <- filter(stds1, Analysis %in% stds.to.cull)
stds <- filter(stds1, !Analysis %in% stds.to.cull) 
culled.data<-bind_rows(culled.data, stds.culled)

stds<- data.frame(stds, cbind(predict.lm(lm(stds$area44 ~ stds$mass), interval=c("confidence"), level=0.95)))

yield.all<-ggplot(data, aes(x=mass, y=area44, fill=type, label=Analysis)) +
  geom_point(size=3, shape=22) +
  theme_bw() +
  labs(title= "all data")

yield.stds<-ggplot(stds, aes(x=mass, y=area44, label=Analysis)) +
  stat_smooth(method="lm") +   
  geom_point(aes(fill=type, shape=type), size=2) +
  theme_bw() +
  scale_shape_manual(values=c(21,22,23,24,25)) +
  labs(title= "standards - yield")
  
calc_std_means_d13C <- function(df) calc_means(df, "d13C.raw")

d13C.stds <- 
  ggplot(stds, aes(label=Analysis)) +
  geom_hline(
    data = calc_std_means_d13C,
    mapping = aes(yintercept = yintercept, color = type, linetype = linetype)) +
  geom_point(shape=21, mapping = aes(x=area44, y=d13C.raw, fill=type)) +
  scale_linetype_manual(values = c(1, 3, 2, 3, 2)) + 
  facet_grid(type ~ ., scales = "free") +
  theme_bw() +
  labs(title= "d13C standards - means and uncertainties")

calc_std_means_d18O <- function(df) calc_means(df, "d18O.raw")

d18O.stds <- 
  ggplot(stds, aes(label=Analysis)) +
  geom_hline(
    data = calc_std_means_d18O,
    mapping = aes(yintercept = yintercept, color = type, linetype = linetype)) +
  geom_point(shape=21, mapping = aes(x=area44, y=d18O.raw, fill=type)) +
  scale_linetype_manual(values = c(1, 3, 2, 3, 2)) + 
  facet_grid(type ~ ., scales = "free") +
  theme(legend.position = "none") +
  theme_bw() +
  labs(title= "d18O standards - means and uncertainties")

ggplotly(yield.all) 
ggplotly(yield.stds)
## `geom_smooth()` using formula 'y ~ x'
ggplotly(d18O.stds)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
ggplotly(d13C.stds)

First plot – samples all look all small.

yield problems: 22995 23009

3.3 Check peaks

Makes plots of individual analysis files for analyses in the culled.data dataframe - use to check peak sizes, signs of atmospheric peaks, and other irregularities

dxf_files %>% 
  iso_filter_files(file_id %in% culled.data$file_id) %>% 
  iso_plot_raw_data(data = c(44), panel = "file", color = "data")
## Info: applying file filter, keeping 21 of 45 files
## Info: plotting data from 21 data file(s)

#If there are any additional chromatograms needed, just input the analysis numbers into the more.files list, replacing data$Analysis[5] with analysis number(s) - for example c(9536, 9655) instead of c(data$Analysis[5]). data$Analysis[5] is just acting as a stand-in to prevent errors if no additional analyses are selected

more.files <- c(data$Analysis[28])

plot.more <- filter(data, Analysis %in% more.files)

dxf_files %>% 
  iso_filter_files(file_id %in% plot.more$file_id) %>% 
  iso_plot_raw_data(data = c(44,45,46), panel = "file", color = "data")
## Info: applying file filter, keeping 1 of 45 files
## Info: plotting data from 1 data file(s)

This is the plot for 23009 - based on the large shoulder peaks, I suspect this vial has atm. in it

#If there are any additional chromatograms needed, just input the analysis numbers into the more.files list, replacing data$Analysis[5] with analysis number(s) - for example c(9536, 9655) instead of c(data$Analysis[5]). data$Analysis[5] is just acting as a stand-in to prevent errors if no additional analyses are selected

more.files <- c(data$Analysis[14])

plot.more <- filter(data, Analysis %in% more.files)

dxf_files %>% 
  iso_filter_files(file_id %in% plot.more$file_id) %>% 
  iso_plot_raw_data(data = c(44,45,46), panel = "file", color = "data")
## Info: applying file filter, keeping 1 of 45 files
## Info: plotting data from 1 data file(s)

This is for 22995. Such tiny peaks. such big shoulders. user error!

3.4 Calculate weight percent carbonate

yield.line <- lm(stds$mass ~ stds$area44)

(yield.slope <- coef(yield.line)[[2]])
## [1] 5.32836
(yield.intercept <- coef(yield.line)[[1]])
## [1] 16.88206
data$PercentCO3 <- ((yield.slope * data$area44 + yield.intercept)/data$mass *100)

data$target.wgt.ug <-  110/(data$PercentCO3/100)

3.5 Calculate raw data means

raw.corr <- filter(stds, Identifier1==corr.std)
raw.mon <- filter(stds, type=="mon.std")
raw.dis <- filter(stds, type=="dis.std")
raw.dis2 <- filter(stds, type=="dis2.std")

C.stds.table$rawC.mean <- c(mean(raw.corr$d13C.raw), mean(raw.mon$d13C.raw), mean(raw.dis$d13C.raw), mean(raw.dis2$d13C.raw))
C.stds.table$rawC.sd <- c(sd(raw.corr$d13C.raw), sd(raw.mon$d13C.raw), sd(raw.dis$d13C.raw), sd(raw.dis2$d13C.raw))

O.stds.table$rawO.mean <- c(mean(raw.corr$d18O.raw), mean(raw.mon$d18O.raw), mean(raw.dis$d18O.raw), mean(raw.dis2$d18O.raw))
O.stds.table$rawO.sd <- c(sd(raw.corr$d18O.raw), sd(raw.mon$d18O.raw), sd(raw.dis$d18O.raw), sd(raw.dis$d18O.raw))

This section is building the first set of mean values of standards and adds them to a table with accepted values.

4 Carbon corrections

4.1 C Offset correction

Evaluate average offset and standard deviation of combined linearity and drift standards. Does NOT include linearity or drift correction to data

offsetC<-subset(stds, Identifier1==corr.std)

(offsetC.mean<-mean(offsetC$d13C.raw))
## [1] -8.566031
(offsetC.sd<-sd(offsetC$d13C.raw))
## [1] 0.2478169
offsetC$d13C.offset <- offsetC$d13C.raw +  (C.acc - offsetC.mean)

(offsetcorrC.mean<-mean(offsetC$d13C.offset))
## [1] -8.25
(offsetcorrC.sd<-sd(offsetC$d13C.offset))
## [1] 0.2478169
d13C.offset<-ggplot(offsetC, aes(x=area44, y=d13C.offset, shape=type)) +
  geom_point(fill="orange", size=3) +
  geom_hline(yintercept=offsetcorrC.mean, colour="orange") +
  geom_hline(yintercept=offsetcorrC.mean + offsetcorrC.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetcorrC.mean - offsetcorrC.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetcorrC.mean + 2*offsetcorrC.sd, colour="orange", linetype=3) +
  geom_hline(yintercept=offsetcorrC.mean - 2*offsetcorrC.sd, colour="orange", linetype=3) +
  scale_shape_manual(values=c(21,22,23,24,25)) +
  annotate("text", y = offsetcorrC.mean + 0.01, x = min(offsetC$area44), 
    label = paste0("mean: ", sprintf("%.2f", offsetcorrC.mean), " \U00B1 ", sprintf("%.2f", offsetcorrC.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="black") +
  theme_bw() 
## Warning in sprintf("%.2f", offsetcorrC.sd, 2): one argument not used by format
## '%.2f'
d13C.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=offsetC, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) +
  theme_bw()

multiplot(d13C.offset, d13C.offset.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

basic offset corrections

Assess effects: Apply offset correction to the whole dataset of raw values, and check the monitoring standards

#apply offset correction to whole dataset
data$d13C.offset <- data$d13C.raw +  (C.acc - offsetC.mean)
stds$d13C.offset <- stds$d13C.raw +  (C.acc - offsetC.mean)

#make monitoring standard dataset and dataset for additional standards used later for discrimination correction
offsetC.mon <- subset(stds, Identifier1==mon.std)
offsetC.dis <- subset(stds, Identifier1==dis.std)
offsetC.dis2 <- subset(stds, Identifier1==dis2.std)


#check monitoring standard response
(offsetC.mon.mean<-mean(offsetC.mon$d13C.offset))
## [1] -3.44357
(offsetC.mon.sd<-sd(offsetC.mon$d13C.offset))
## [1] 0.1464611
C.stds.table$offsetC.mean <- c(offsetcorrC.mean, offsetC.mon.mean, mean(offsetC.dis$d13C.offset), mean(offsetC.dis2$d13C.offset))
C.stds.table$offsetC.sd <- c(offsetcorrC.sd, offsetC.mon.sd, sd(offsetC.dis$d13C.offset), sd(offsetC.dis2$d13C.offset))

C.mon.offset<-ggplot(offsetC.mon, aes(x=area44, y=d13C.offset)) +
  geom_point(shape=21, fill="orange") +
  geom_hline(yintercept=offsetC.mon.mean, colour="orange") +
  geom_hline(yintercept=offsetC.mon.mean + offsetC.mon.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetC.mon.mean - offsetC.mon.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetC.mon.mean + 2*offsetC.mon.sd, colour="orange", linetype=3) +
  geom_hline(yintercept=offsetC.mon.mean - 2*offsetC.mon.sd, colour="orange", linetype=3) +
  annotate("text", y = offsetC.mon.mean +0.01, x = min(offsetC.mon$area44), label = paste0("mean: ", sprintf("%.2f", offsetC.mon.mean), " \U00B1 ", sprintf("%.2f", offsetC.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) +
  theme_bw()
## Warning in sprintf("%.2f", offsetC.mon.sd, 2): one argument not used by format
## '%.2f'
C.mon.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=offsetC.mon, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) +
  theme_bw()

multiplot(C.mon.offset, C.mon.offset.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

4.2 C Drift corrections

Using the raw values, assess drift in isotope values throughout the run

driftC<-subset(stds, type=="drift.std")

drift.slopeC<-(coef(lm(as.numeric(driftC$d13C.raw) ~ driftC$row))[[2]])
drift.interC<-(coef(lm(as.numeric(driftC$d13C.raw) ~ driftC$row))[[1]])

#drift check
driftC$d13C.drift<- driftC$d13C.raw + (C.acc - (drift.slopeC * driftC$row + drift.interC))

(driftC.mean<-mean(driftC$d13C.drift))
## [1] -8.25
(driftC.sd<-sd(driftC$d13C.drift))
## [1] 1.776357e-15
C.drift<-ggplot(driftC, aes(x=row, y=d13C.raw)) +
  geom_smooth(method=lm, colour="black") +
  annotate("text", x = min(driftC$row), y = max(driftC$d13C.raw + 0.01), label = lm_eqn(driftC$row, driftC$d13C.raw),  size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=row, y=d13C.drift), fill="red", shape=21, size=2) +
  geom_hline(aes(yintercept=C.acc), size=.5) +
  geom_hline(yintercept = driftC.mean + driftC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftC.mean - driftC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftC.mean + 2*driftC.sd, colour="red", linetype=3) +
  geom_hline(yintercept = driftC.mean - 2*driftC.sd, colour="red", linetype=3) +
  annotate("text", 
    y = driftC.mean +0.01, 
    x = min(driftC$row),
    label = paste0("mean: ", sprintf("%.2f", driftC.mean), " \U00B1 ", sprintf("%.2f", driftC.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()
## Warning in sprintf("%.2f", driftC.sd, 2): one argument not used by format '%.2f'
C.drift.mass<-ggplot(stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=driftC, aes(x=mass, y=area44), shape=21, fill="red", size=3) +
  theme_bw()

multiplot(C.drift, C.drift.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## `geom_smooth()` using formula 'y ~ x'

Is there visible drift? sloped line? no slope?

Assess effects: Apply drift correction to the whole dataset of raw values, and check the monitoring standards

data$d13C.drift <- data$d13C.raw +  (C.acc - (drift.slopeC * data$row + drift.interC))
stds$d13C.drift <- stds$d13C.raw +  (C.acc - (drift.slopeC * stds$row + drift.interC))

driftC.mon<-subset(stds, Identifier1==mon.std)
driftC.dis <- subset(stds, Identifier1==dis.std)
driftC.dis2 <- subset(stds, Identifier1==dis2.std)

(driftC.mon.mean<-mean(driftC.mon$d13C.drift))
## [1] -3.254645
(driftC.mon.sd<-sd(driftC.mon$d13C.drift))
## [1] 0.4593889
C.stds.table$driftC.mean <- c(driftC.mean, driftC.mon.mean, mean(driftC.dis$d13C.drift),mean(driftC.dis2$d13C.drift))
C.stds.table$driftC.sd <- c(driftC.sd, driftC.mon.sd, sd(driftC.dis$d13C.drift),sd(driftC.dis2$d13C.drift))

C.mon.drift<-ggplot(driftC.mon, aes(x=area44, y=d13C.drift)) +
  geom_point(shape=21, fill="red") +
  geom_hline(yintercept = driftC.mon.mean, colour="red") +
  geom_hline(yintercept = driftC.mon.mean + driftC.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftC.mon.mean - driftC.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftC.mon.mean + 2*driftC.mon.sd, colour="red", linetype=3) +
  geom_hline(yintercept = driftC.mon.mean - 2*driftC.mon.sd, colour="red", linetype=3) +
  annotate("text",
    y = driftC.mon.mean +0.01, 
    x = min(driftC.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", driftC.mon.mean), " \U00B1 ", sprintf("%.2f", driftC.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="red")
## Warning in sprintf("%.2f", driftC.mon.sd, 2): one argument not used by format
## '%.2f'
C.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=driftC.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) +
  theme_bw()

multiplot(C.mon.drift, C.mon.drift.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

4.3 C Linearity correction

Using the raw values, assess drift in isotope values throughout the run

linC<-subset(stds, type=="lin.std") 

lin.slopeC<-(coef(lm(linC$d13C.raw ~ linC$inv.area44))[[2]])
lin.interC<-(coef(lm(linC$d13C.raw ~ linC$inv.area44))[[1]])

#linearity check
linC$d13C.lin<-linC$d13C.raw + (C.acc - (lin.slopeC * linC$inv.area44 + lin.interC))

(linC.mean<-mean(linC$d13C.lin))
## [1] -8.25
(linC.sd<-sd(linC$d13C.lin))
## [1] 0.03104419
C.lin.area44<-ggplot(linC, aes(x=area44, y=d13C.raw)) +
  geom_point(shape=21, fill="blue") +
  geom_smooth()  

C.lincorr.invarea<-ggplot(linC, aes(x=inv.area44, y=d13C.raw)) +
  geom_smooth(method=lm) +
  annotate("text", x = min(linC$inv.area44), y = max(linC$d13C.raw + 0.01), label = lm_eqn(linC$inv.area44, linC$d13C.raw),  size = 4, hjust=0, vjust=0, parse=TRUE) +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=inv.area44, y=d13C.lin), fill="red", shape=22) +
  geom_hline(aes(yintercept=C.acc), size=.5) +
  geom_hline(yintercept = linC.mean + linC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = linC.mean - linC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = linC.mean + 2*linC.sd, colour="red", linetype=3) +
  geom_hline(yintercept = linC.mean - 2*linC.sd, colour="red", linetype=3) +
  annotate("text",
    y = linC.mean +0.01, 
    x = min(linC$inv.area44),
    label = paste0("mean: ", sprintf("%.2f", linC.mean), " \U00B1 ", sprintf("%.2f", linC.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()
## Warning in sprintf("%.2f", linC.sd, 2): one argument not used by format '%.2f'
C.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=linC, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) +
  theme_bw()

multiplot(C.lin.area44, C.lincorr.invarea, C.lin.mass, cols=3)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 8.3317
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 8.7163
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 277.82
## Warning in sqrt(sum.squares/one.delta): NaNs produced
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small. fewer
## data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 8.3317
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 8.7163
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 277.82
## Warning in stats::qt(level/2 + 0.5, pred$df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Assess effects: Apply linearity correction to the whole dataset of raw values, and check the monitoring standards

data$d13C.lin <- data$d13C.raw +  (C.acc - (lin.slopeC * data$inv.area44 + lin.interC))
stds$d13C.lin <- stds$d13C.raw +  (C.acc - (lin.slopeC * stds$inv.area44 + lin.interC))

linC.mon<-subset(stds, Identifier1==mon.std)
linC.dis<-subset(stds, Identifier1==dis.std)
linC.dis2<-subset(stds, Identifier1==dis2.std)

(linC.mon.mean <- mean(linC.mon$d13C.lin))
## [1] -3.41218
(linC.mon.sd<-sd(linC.mon$d13C.lin))
## [1] 0.1446992
C.stds.table$linC.mean <- c(linC.mean, linC.mon.mean, mean(linC.dis$d13C.lin),mean(linC.dis2$d13C.lin))
C.stds.table$linC.sd <- c(linC.sd, linC.mon.sd, sd(linC.dis$d13C.lin), sd(linC.dis$d13C.lin))

C.mon.lin<-ggplot(linC.mon, aes(x=area44, y=d13C.lin)) +
  geom_point(shape=21, fill="blue") +
  geom_hline(yintercept = linC.mon.mean, colour="blue") +
  geom_hline(yintercept = linC.mon.mean + linC.mon.sd, colour="blue", linetype="dashed") +
  geom_hline(yintercept = linC.mon.mean - linC.mon.sd, colour="blue", linetype="dashed") +
  geom_hline(yintercept = linC.mon.mean + 2*linC.mon.sd, colour="blue", linetype=3) +
  geom_hline(yintercept = linC.mon.mean - 2*linC.mon.sd, colour="blue", linetype=3) +
  annotate("text",
    y = linC.mon.mean +0.01, 
    x = min(linC.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", linC.mon.mean), " \U00B1 ", sprintf("%.2f", linC.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="blue")
## Warning in sprintf("%.2f", linC.mon.sd, 2): one argument not used by format
## '%.2f'
C.mon.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=linC.mon, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) +
  theme_bw()

multiplot(C.mon.lin, C.mon.lin.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

4.4 C Combined linearity + drift correction

Because a run can be affected by both drift and linearity, here we assess the affects of apply both the linearity and drift corrections to the raw data. This uses the linearity corrected values, and looks at the drift in those values

lindriftC <- merge(driftC, data[c("row", "d13C.lin")], by.x="row", by.y="row", all.x=TRUE, all.y=FALSE, sort=FALSE)

lindrift.slopeC<-(coef(lm(lindriftC$d13C.lin ~ lindriftC$row))[[2]])
lindrift.interC<-(coef(lm(lindriftC$d13C.lin ~ lindriftC$row))[[1]])

#drift check
lindriftC$d13C.lindrift<- lindriftC$d13C.lin + (C.acc - (lindrift.slopeC * lindriftC$row + lindrift.interC))

(lindriftC.mean<-mean(lindriftC$d13C.drift))
## [1] -8.25
(lindriftC.sd<-sd(lindriftC$d13C.drift))
## [1] 1.776357e-15
C.lindrift<-ggplot(lindriftC, aes(x=row, y=d13C.lin)) +
  geom_smooth(method=lm, colour="black") +
  annotate("text", x = min(lindriftC$row), y = max(lindriftC$d13C.lin + 0.01), label = lm_eqn(lindriftC$row, lindriftC$d13C.lin),  size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=row, y=d13C.lindrift), fill="red", shape=22, size=2) +
  geom_hline(aes(yintercept=C.acc), size=.5) +
  geom_hline(yintercept = lindriftC.mean + lindriftC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftC.mean - lindriftC.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftC.mean + 2*lindriftC.sd, colour="red", linetype=3) +
  geom_hline(yintercept = lindriftC.mean - 2*lindriftC.sd, colour="red", linetype=3) +
  annotate("text",
    y = lindriftC.mean +0.01, 
    x = min(lindriftC$row),
    label = paste0("mean: ", sprintf("%.2f", lindriftC.mean), " \U00B1 ", sprintf("%.2f", lindriftC.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()
## Warning in sprintf("%.2f", lindriftC.sd, 2): one argument not used by format
## '%.2f'
C.lindrift.mass<-ggplot(stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=lindriftC, aes(x=mass, y=area44), shape=21, fill="red", size=3) +
  theme_bw()

multiplot(C.lindrift, C.lindrift.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## `geom_smooth()` using formula 'y ~ x'

Assess effects: Apply drift correction to the whole dataset of linearity corrected values, and check the monitoring standards

data$d13C.lindrift <- data$d13C.lin +  (C.acc - (lindrift.slopeC * data$row + lindrift.interC))
stds$d13C.lindrift <- stds$d13C.lin +  (C.acc - (lindrift.slopeC * stds$row + lindrift.interC))

lindriftC.mon<-subset(stds, Identifier1==mon.std)
lindriftC.dis<-subset(stds, Identifier1==dis.std)
lindriftC.dis2<-subset(stds, Identifier1==dis2.std)

(lindriftC.mon.mean<-mean(lindriftC.mon$d13C.lindrift))
## [1] -3.254065
(lindriftC.mon.sd<-sd(lindriftC.mon$d13C.lindrift))
## [1] 0.4573653
C.stds.table$lindriftC.mean <- c(lindriftC.mean, lindriftC.mon.mean, mean(lindriftC.dis$d13C.lindrift), mean(lindriftC.dis2$d13C.lindrift))
C.stds.table$lindriftC.sd <- c(lindriftC.sd, lindriftC.mon.sd, sd(lindriftC.dis$d13C.lindrift),sd(lindriftC.dis$d13C.lindrift))

C.mon.drift<-ggplot(lindriftC.mon, aes(x=area44, y=d13C.lindrift)) +
  geom_point(shape=21, fill="red") +
  geom_hline(yintercept = lindriftC.mon.mean, colour="red") +
  geom_hline(yintercept = lindriftC.mon.mean + lindriftC.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftC.mon.mean - lindriftC.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftC.mon.mean + 2*lindriftC.mon.sd, colour="red", linetype=3) +
  geom_hline(yintercept = lindriftC.mon.mean - 2*lindriftC.mon.sd, colour="red", linetype=3) +
  annotate("text",
    y = lindriftC.mon.mean +0.01, 
    x = min(lindriftC.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", lindriftC.mon.mean), " \U00B1 ", sprintf("%.2f", lindriftC.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="red")
## Warning in sprintf("%.2f", lindriftC.mon.sd, 2): one argument not used by format
## '%.2f'
C.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=lindriftC.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) +
  theme_bw()

multiplot(C.mon.drift, C.mon.drift.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

4.5 C Scale correction

Pick the appropriate column of either raw or corrected data to use in the scale regression. If there is no observable effects of linearity or drift (or both), use the raw values. The default is to select the raw values. Then this correct is applied to the whole dataset, using the same choice of previously corrected (or raw) values as used in making the scale correction

# replace the character string here with the correction column you want to use
col_for_scale_C <- "raw"  # options to substitute in here are: "raw" or "offset" or "drift" or  "lin" or  "lindrift"

#make new table of measured scale correction values versus the accepted values
col_in_data <- paste0("d13C.", col_for_scale_C)
C.accepted <- C.stds.table %>%
  select(std.name, C.acc)
scale.stds <- stds %>%
  #filter(Identifier1 == "NBS18" |Identifier1 == "Merck.UTSA") %>% #| Identifier1 == "CU YULE"
  rename(std.name = Identifier1)
  
scale.corr <-  left_join(scale.stds, C.accepted, by = "std.name") %>%
  select(std.name, C.acc, col_in_data) %>%
  rename(d13C = col_in_data)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(col_in_data)` instead of `col_in_data` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
# safety checks
if (!col_in_data %in% names(data)) stop("this column does not exist in data or stds table: ", col_in_data, call. = FALSE)

# regression
m <- lm(scale.corr$C.acc ~ scale.corr$d13C)
scale.slopeC<-(coef(m)[[2]])
scale.interC<-(coef(m)[[1]])
R2 <- summary(m)$r.squared
S <- summary(m)$sigma
d13C.error.S <- S

# apply correction
data <- data %>%
  mutate_(d13C.scale = lazyeval::interp(~scale.slopeC * var + scale.interC, var = as.name(col_in_data)))
## Warning: `mutate_()` was deprecated in dplyr 0.7.0.
## Please use `mutate()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
stds <- stds %>%
  mutate_(d13C.scale = lazyeval::interp(~scale.slopeC * var + scale.interC, var = as.name(col_in_data)))

C.scale.all <- 
  ggplot(scale.corr, aes(x=d13C, y=C.acc)) +
  geom_smooth(method="lm", color = "blue") +
  geom_point(data=filter(data, d18O.sd<d18O.sd.cutoff), aes_string(x=col_in_data, y="d13C.scale"), shape=23, fill="red", size = 2) +
  geom_point(shape=21, fill="blue", size = 2) +
  geom_text(
           x = max(scale.corr$d13C), 
           y = max(scale.corr$C.acc), 
           label = str_interp("C.acc = $[.2f]{slope} ${var} + $[.2f]{intercept} (R2: $[.4f]{R2}) (S: $[.3f]{S})", 
                              list(slope = scale.slopeC, intercept = scale.interC, var = col_for_scale_C, R2 = R2, S = S)),
           size = 4, hjust=1.1, vjust=0, colour="blue") +
  labs(x = col_for_scale_C) +
  theme_bw()

C.scale.all
## `geom_smooth()` using formula 'y ~ x'

S value should be better than 0.1 per mil. Generally a good analytical precision to quote.

Assess effects: Check the monitoring standards after applying the scale correction

scaleC.mon<-subset(stds, Identifier1==mon.std)
scaleC.dis<-subset(stds, Identifier1==dis.std)
scaleC.dis2<-subset(stds, Identifier1==dis2.std)
scaleC.corr<-subset(stds, Identifier1==corr.std)

(scaleC.mon.mean<-mean(scaleC.mon$d13C.scale))
## [1] -3.255508
(scaleC.mon.sd<-sd(scaleC.mon$d13C.scale))
## [1] 0.1487182
C.stds.table$scaleC.mean <- c(mean(scaleC.corr$d13C.scale), scaleC.mon.mean, mean(scaleC.dis$d13C.scale), mean(scaleC.dis2$d13C.scale))
C.stds.table$scaleC.sd <- c(sd(scaleC.corr$d13C.scale), scaleC.mon.sd, sd(scaleC.dis$d13C.scale), sd(scaleC.dis2$d13C.scale))

5 Oxygen corrections

5.1 O Offset correction

Evaluate average offset and standard deviation of combined linearity and drift standards. Does NOT include linearity or drift correction to data

offsetO<-subset(stds, Identifier1==corr.std)

(offsetO.mean<-mean(offsetO$d18O.raw))
## [1] -6.713913
(offsetO.sd<-sd(offsetO$d18O.raw))
## [1] 0.7492133
offsetO$d18O.offset <- offsetO$d18O.raw +  (O.acc - offsetO.mean)

(offsetcorrO.mean<-mean(offsetO$d18O.offset))
## [1] -9.65
(offsetcorrO.sd<-sd(offsetO$d18O.offset))
## [1] 0.7492133
d18O.offset<-ggplot(offsetO, aes(x=area44, y=d18O.offset, shape=type)) +
  geom_point(fill="orange", size=3) +
  geom_hline(yintercept=offsetcorrO.mean, colour="orange") +
  geom_hline(yintercept=offsetcorrO.mean + offsetcorrO.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetcorrO.mean - offsetcorrO.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetcorrO.mean + 2*offsetcorrO.sd, colour="orange", linetype=3) +
  geom_hline(yintercept=offsetcorrO.mean - 2*offsetcorrO.sd, colour="orange", linetype=3) +
  scale_shape_manual(values=c(21,22,23,24,25)) +
  annotate("text", y = offsetcorrO.mean + 0.01, x = min(offsetO$area44), 
    label = paste0("mean: ", sprintf("%.2f", offsetcorrO.mean), " \U00B1 ", sprintf("%.2f", offsetcorrO.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="black") +
  theme_bw() 
## Warning in sprintf("%.2f", offsetcorrO.sd, 2): one argument not used by format
## '%.2f'
d18O.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=offsetO, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) +
  theme_bw()

multiplot(d18O.offset, d18O.offset.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

Assess effects: Apply offset correction to the whole dataset of linearity corrected values, and check the monitoring standards

#apply offset correction to whole dataset
data$d18O.offset <- data$d18O.raw +  (O.acc - offsetO.mean)
stds$d18O.offset <- stds$d18O.raw +  (O.acc - offsetO.mean)

#make monitoring standard dataset and dataset for additional standards used later for discrimination correction
offsetO.mon <- subset(stds, Identifier1==mon.std)
offsetO.dis <- subset(stds, Identifier1==dis.std)
offsetO.dis2 <- subset(stds, Identifier1==dis2.std)

#check monitoring standard response
(offsetO.mon.mean<-mean(offsetO.mon$d18O.offset))
## [1] -7.999884
(offsetO.mon.sd<-sd(offsetO.mon$d18O.offset))
## [1] 0.4342504
O.stds.table$offsetO.mean <- c(offsetcorrO.mean, offsetO.mon.mean, mean(offsetO.dis$d18O.offset), mean(offsetO.dis2$d18O.offset))
O.stds.table$offsetO.sd <- c(offsetcorrO.sd, offsetO.mon.sd, sd(offsetO.dis$d18O.offset), sd(offsetO.dis2$d18O.offset))

O.mon.offset<-ggplot(offsetO.mon, aes(x=area44, y=d18O.offset)) +
  geom_point(shape=21, fill="orange") +
  geom_hline(yintercept=offsetO.mon.mean, colour="orange") +
  geom_hline(yintercept=offsetO.mon.mean + offsetO.mon.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetO.mon.mean - offsetO.mon.sd, colour="orange", linetype="dashed") +
  geom_hline(yintercept=offsetO.mon.mean + 2*offsetO.mon.sd, colour="orange", linetype=3) +
  geom_hline(yintercept=offsetO.mon.mean - 2*offsetO.mon.sd, colour="orange", linetype=3) +
  annotate("text", y = offsetO.mon.mean +0.01, x = min(offsetO.mon$area44), label = paste0("mean: ", sprintf("%.2f", offsetO.mon.mean), " \U00B1 ", sprintf("%.2f", offsetO.mon.sd, 2), " \U2030 (1 sd)"), size = 4, hjust=0, vjust=0, parse=FALSE) +
  theme_bw()
## Warning in sprintf("%.2f", offsetO.mon.sd, 2): one argument not used by format
## '%.2f'
O.mon.offset.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=offsetO.mon, aes(x=mass, y=area44), shape=21, fill="orange", size = 2) +
  theme_bw()

multiplot(O.mon.offset, O.mon.offset.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

5.2 O Drift corrections

Using the raw values, assess drift in isotope values throughout the run

driftO<-subset(stds, type=="drift.std")

drift.slopeO<-(coef(lm(driftO$d18O.raw ~ driftO$row))[[2]])
drift.interO<-(coef(lm(driftO$d18O.raw ~ driftO$row))[[1]])

#drift check
driftO$d18O.drift<- driftO$d18O.raw + (O.acc - (drift.slopeO * driftO$row + drift.interO))

(driftO.mean<-mean(driftO$d18O.drift))
## [1] -9.65
(driftO.sd<-sd(driftO$d18O.drift))
## [1] 1.776357e-15
O.drift<-ggplot(driftO, aes(x=row, y=d18O.raw)) +
  geom_smooth(method=lm, colour="black") +
  annotate("text", x = min(driftO$row), y = max(driftO$d18O.raw + 0.01), label = lm_eqn(driftO$row, driftO$d18O.raw),  size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=row, y=d18O.drift), fill="red", shape=22, size=2) +
  geom_hline(aes(yintercept=O.acc), size=.5) +
  geom_hline(yintercept = driftO.mean + driftO.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftO.mean - driftO.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftO.mean + 2*driftO.sd, colour="red", linetype=3) +
  geom_hline(yintercept = driftO.mean - 2*driftO.sd, colour="red", linetype=3) +
  annotate("text",
    y = driftO.mean +0.01, 
    x = min(driftO$row),
    label = paste0("mean: ", sprintf("%.2f", driftO.mean), " \U00B1 ", sprintf("%.2f", driftO.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()
## Warning in sprintf("%.2f", driftO.sd, 2): one argument not used by format '%.2f'
O.drift.mass<-ggplot(stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=driftO, aes(x=mass, y=area44), shape=21, fill="red", size=3) +
  theme_bw()

multiplot(O.drift, O.drift.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## `geom_smooth()` using formula 'y ~ x'

Assess effects: Apply drift correction to the whole dataset of linearity corrected values, and check the monitoring standards

data$d18O.drift <- data$d18O.raw +  (O.acc - (drift.slopeO * data$row + drift.interO))
stds$d18O.drift <- stds$d18O.raw +  (O.acc - (drift.slopeO * stds$row + drift.interO))

driftO.mon<-subset(stds, Identifier1==mon.std)
driftO.dis <- subset(stds, Identifier1==dis.std)
driftO.dis2 <- subset(stds, Identifier1==dis2.std)

(driftO.mon.mean<-mean(driftO.mon$d18O.drift))
## [1] -6.607108
(driftO.mon.sd<-sd(driftO.mon$d18O.drift))
## [1] 0.2367961
O.stds.table$driftO.mean <- c(driftO.mean, driftO.mon.mean, mean(driftO.dis$d18O.drift), mean(driftO.dis2$d18O.drift))
O.stds.table$driftO.sd <- c(driftO.sd, driftO.mon.sd, sd(driftO.dis$d18O.drift), sd(driftO.dis$d18O.drift)) 

O.mon.drift<-ggplot(driftO.mon, aes(x=area44, y=d18O.drift)) +
  geom_point(shape=21, fill="red") +
  geom_hline(yintercept = driftO.mon.mean, colour="red") +
  geom_hline(yintercept = driftO.mon.mean + driftO.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftO.mon.mean - driftO.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = driftO.mon.mean + 2*driftO.mon.sd, colour="red", linetype=3) +
  geom_hline(yintercept = driftO.mon.mean - 2*driftO.mon.sd, colour="red", linetype=3) +
  annotate("text",
    y = driftO.mon.mean +0.01, 
    x = min(driftO.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", driftO.mon.mean), " \U00B1 ", sprintf("%.2f", driftO.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="red")
## Warning in sprintf("%.2f", driftO.mon.sd, 2): one argument not used by format
## '%.2f'
O.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=driftO.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) +
  theme_bw()

multiplot(O.mon.drift, O.mon.drift.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

5.3 O Linearity correction

Using the raw values, assess drift in isotope values throughout the run

linO<-subset(stds, type=="lin.std")

lin.slopeO<-(coef(lm(linO$d18O.raw ~ linO$inv.area44))[[2]])
lin.interO<-(coef(lm(linO$d18O.raw ~ linO$inv.area44))[[1]])

#linearity check
linO$d18O.lin<-linO$d18O.raw + (O.acc - (lin.slopeO * linO$inv.area44 + lin.interO))

(linO.mean<-mean(linO$d18O.lin))
## [1] -9.65
(linO.sd<-sd(linO$d18O.lin))
## [1] 0.04894948
O.lin.area44<-ggplot(linO, aes(x=area44, y=d18O.raw)) +
  geom_point(shape=21, fill="blue") +
  geom_smooth() 

O.lincorr.invarea<-ggplot(linO, aes(x=inv.area44, y=d18O.raw)) +
  geom_smooth(method=lm) +
  annotate("text", x = min(linO$inv.area44), y = max(linO$d18O.raw + 0.01), label = lm_eqn(linO$inv.area44, linO$d18O.raw),  size = 4, hjust=0, vjust=0, parse=TRUE) +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=inv.area44, y=d18O.lin), fill="red", shape=22) +
  geom_hline(aes(yintercept=O.acc), size=.5) +
  geom_hline(yintercept = linO.mean + linO.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = linO.mean - linO.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = linO.mean + 2*linO.sd, colour="red", linetype=3) +
  geom_hline(yintercept = linO.mean - 2*linO.sd, colour="red", linetype=3) +
  annotate("text",
    y = linO.mean +0.01, 
    x = min(linO$inv.area44),
    label = paste0("mean: ", sprintf("%.2f", linO.mean), " \U00B1 ", sprintf("%.2f", linO.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()
## Warning in sprintf("%.2f", linO.sd, 2): one argument not used by format '%.2f'
O.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=linO, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) +
  theme_bw()

multiplot(O.lin.area44, O.lincorr.invarea, O.lin.mass, cols=3)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 8.3317
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 8.7163
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 277.82
## Warning in sqrt(sum.squares/one.delta): NaNs produced
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : span too small. fewer
## data values than degrees of freedom.
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
## 8.3317
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 8.7163
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
## number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other near
## singularities as well. 277.82
## Warning in stats::qt(level/2 + 0.5, pred$df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Assess effects: Apply linearity correction to the whole dataset of raw, and check the monitoring standards

data$d18O.lin <- data$d18O.raw +  (O.acc - (lin.slopeO * data$inv.area44 + lin.interO))
stds$d18O.lin <- stds$d18O.raw +  (O.acc - (lin.slopeO * stds$inv.area44 + lin.interO))

linO.mon<-subset(stds, Identifier1==mon.std)
linO.dis<-subset(stds, Identifier1==dis.std)
linO.dis2<-subset(stds, Identifier1==dis2.std)

(linO.mon.mean <- mean(linO.mon$d18O.lin))
## [1] -8.369532
(linO.mon.sd<-sd(linO.mon$d18O.lin))
## [1] 0.4488262
O.stds.table$linO.mean <- c(linO.mean, linO.mon.mean, mean(linO.dis$d18O.lin), mean(linO.dis2$d18O.lin))
O.stds.table$linO.sd <- c(linO.sd, linO.mon.sd, sd(linO.dis$d18O.lin), sd(linO.dis2$d18O.lin)) 

O.mon.lin<-ggplot(linO.mon, aes(x=area44, y=d18O.lin)) +
  geom_point(shape=21, fill="blue") +
  geom_hline(yintercept = linO.mon.mean, colour="blue") +
  geom_hline(yintercept = linO.mon.mean + linO.mon.sd, colour="blue", linetype="dashed") +
  geom_hline(yintercept = linO.mon.mean - linO.mon.sd, colour="blue", linetype="dashed") +
  geom_hline(yintercept = linO.mon.mean + 2*linO.mon.sd, colour="blue", linetype=3) +
  geom_hline(yintercept = linO.mon.mean - 2*linO.mon.sd, colour="blue", linetype=3) +
  annotate("text",
    y = linO.mon.mean +0.01, 
    x = min(linO.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", linO.mon.mean), " \U00B1 ", sprintf("%.2f", linO.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="blue")
## Warning in sprintf("%.2f", linO.mon.sd, 2): one argument not used by format
## '%.2f'
O.mon.lin.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=linO.mon, aes(x=mass, y=area44), shape=21, fill="blue", size = 2) +
  theme_bw()

multiplot(O.mon.lin, O.mon.lin.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

5.4 O Combined linearity + drift correction

Because a run can be affected by both drift and linearity, here we assess the affects of apply both the linearity and drift corrections to the raw data. This uses the linearity corrected values, and looks at the drift in those values

lindriftO <- merge(driftO, data[c("row", "d18O.lin")], by.x="row", by.y="row", all.x=TRUE, all.y=FALSE, sort=FALSE)

lindrift.slopeO<-(coef(lm(lindriftO$d18O.lin ~ lindriftO$row))[[2]])
lindrift.interO<-(coef(lm(lindriftO$d18O.lin ~ lindriftO$row))[[1]])

#drift check
lindriftO$d18O.lindrift<- lindriftO$d18O.lin + (O.acc - (lindrift.slopeO * lindriftO$row + lindrift.interO))

(lindriftO.mean<-mean(lindriftO$d18O.drift))
## [1] -9.65
(lindriftO.sd<-sd(lindriftO$d18O.drift))
## [1] 1.776357e-15
O.lindrift<-ggplot(lindriftO, aes(x=row, y=d18O.lin)) +
  geom_smooth(method=lm, colour="black") +
  annotate("text", x = min(lindriftO$row), y = max(lindriftO$d18O.lin + 0.01), label = lm_eqn(lindriftO$row, lindriftO$d18O.lin),  size = 4, hjust=0, vjust=0, parse=TRUE, colour="black") +
  geom_point(shape=21, fill="black", size=2) +
  geom_point(aes(x=row, y=d18O.lindrift), fill="red", shape=22, size=2) +
  geom_hline(aes(yintercept=O.acc), size=.5) +
  geom_hline(yintercept = lindriftO.mean + lindriftO.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftO.mean - lindriftO.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftO.mean + 2*lindriftO.sd, colour="red", linetype=3) +
  geom_hline(yintercept = lindriftO.mean - 2*lindriftO.sd, colour="red", linetype=3) +
  annotate("text",
    y = lindriftO.mean +0.01, 
    x = min(lindriftO$row),
    label = paste0("mean: ", sprintf("%.2f", lindriftO.mean), " \U00B1 ", sprintf("%.2f", lindriftO.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE) +
    theme_bw()
## Warning in sprintf("%.2f", lindriftO.sd, 2): one argument not used by format
## '%.2f'
O.lindrift.mass<-ggplot(stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=lindriftO, aes(x=mass, y=area44), shape=21, fill="red", size=3) +
  theme_bw()

multiplot(O.lindrift, O.lindrift.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## `geom_smooth()` using formula 'y ~ x'

Assess effects: Apply drift correction to the whole dataset of linearity corrected values, and check the monitoring standards

data$d18O.lindrift <- data$d18O.lin +  (O.acc - (lindrift.slopeO * data$row + lindrift.interO))
stds$d18O.lindrift <- stds$d18O.lin +  (O.acc - (lindrift.slopeO * stds$row + lindrift.interO))

lindriftO.mon<-subset(stds, Identifier1==mon.std) 
lindriftO.dis<-subset(stds, Identifier1==dis.std)
lindriftO.dis2<-subset(stds, Identifier1==dis2.std)

(lindriftO.mon.mean<-mean(lindriftO.mon$d18O.lindrift))
## [1] -6.60231
(lindriftO.mon.sd<-sd(lindriftO.mon$d18O.lindrift))
## [1] 0.2200558
O.stds.table$lindriftO.mean <- cbind(c(lindriftO.mean, lindriftO.mon.mean, mean(lindriftO.dis$d18O.lindrift),  mean(lindriftO.dis2$d18O.lindrift)))
O.stds.table$lindriftO.sd <- cbind(c(lindriftO.sd, lindriftO.mon.sd, sd(lindriftO.dis$d18O.lindrift), sd(lindriftO.dis2$d18O.lindrift))) 

O.mon.drift<-ggplot(lindriftO.mon, aes(x=area44, y=d18O.lindrift)) +
  geom_point(shape=21, fill="red") +
  geom_hline(yintercept = lindriftO.mon.mean, colour="red") +
  geom_hline(yintercept = lindriftO.mon.mean + lindriftO.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftO.mon.mean - lindriftO.mon.sd, colour="red", linetype="dashed") +
  geom_hline(yintercept = lindriftO.mon.mean + 2*lindriftO.mon.sd, colour="red", linetype=3) +
  geom_hline(yintercept = lindriftO.mon.mean - 2*lindriftO.mon.sd, colour="red", linetype=3) +
  annotate("text",
    y = lindriftO.mon.mean +0.01, 
    x = min(lindriftO.mon$area44),
    label = paste0("mean: ", sprintf("%.2f", lindriftO.mon.mean), " \U00B1 ", sprintf("%.2f", lindriftO.mon.sd, 2), " \U2030 (1 sd)"),
    size = 4, hjust=0, vjust=0, parse=FALSE, colour="red")
## Warning in sprintf("%.2f", lindriftO.mon.sd, 2): one argument not used by format
## '%.2f'
O.mon.drift.mass<-ggplot (stds, aes(x=mass, y=area44)) +
  stat_smooth(method="lm") +
  geom_point(data=lindriftO.mon, aes(x=mass, y=area44), shape=21, fill="red", size = 2) +
  theme_bw()

multiplot(O.mon.drift, O.mon.drift.mass, cols=2)
## `geom_smooth()` using formula 'y ~ x'

5.5 O Scale correction

Pick the appropriate column of either raw or corrected data to use in the scale regression. If there is no observable effects of linearity or drift (or both), use the raw values. The default is to select the raw values. Then this correct is applied to the whole dataset, using the same choice of previously corrected (or raw) values as used in making the scale correction

# replace the character string here with the correction column you want to use
col_for_scale_O <- "lindrift"  # options to substitute in here are: "raw" or "offset" or "drift" or  "lin" or  "lindrift"

#make new table of measured scale correction values versus the accepted values
col_in_data <- paste0("d18O.", col_for_scale_O)
O.accepted <- O.stds.table %>%
  select(std.name, O.acc)
scale.stds <- stds %>%
  #filter(Identifier1 == "NBS18" | Identifier1 == "YULE") %>%
  rename(std.name = Identifier1)
  
scale.corr <-  left_join(scale.stds, O.accepted, by = "std.name") %>%
  select(std.name, O.acc, col_in_data) %>%
  rename(d18O = col_in_data)

# safety checks
if (!col_in_data %in% names(data)) stop("this column does not exist in data or stds table: ", col_in_data, call. = FALSE)

# regression
m <- lm(scale.corr$O.acc ~ scale.corr$d18O)
scale.slopeO<-(coef(m)[[2]])
scale.interO<-(coef(m)[[1]])
R2 <- summary(m)$r.squared
S <- summary(m)$sigma
d18O.error.S <- S

# apply correction
data <- data %>%
  mutate_(d18O.scale = lazyeval::interp(~scale.slopeO * var + scale.interO, var = as.name(col_in_data)))
stds <- stds %>%
  mutate_(d18O.scale = lazyeval::interp(~scale.slopeO * var + scale.interO, var = as.name(col_in_data)))

O.scale.all <- 
  ggplot(scale.corr, aes(x=d18O, y=O.acc)) +
  geom_smooth(method="lm", color = "blue") +
  geom_point(data=filter(data, d18O.sd<d18O.sd.cutoff), aes_string(x=col_in_data, y="d18O.scale"), shape=23, fill="red", size = 2) +
  geom_point(shape=21, fill="blue", size = 2) +
  geom_text(
           x = max(scale.corr$d18O), 
           y = max(scale.corr$O.acc), 
           label = str_interp("O.acc = $[.2f]{slope} ${var} + $[.2f]{intercept} (R2: $[.4f]{R2}) (S: $[.3f]{S})", 
                              list(slope = scale.slopeO, intercept = scale.interO, var = col_for_scale_O, R2 = R2, S = S)),
           size = 4, hjust=1.1, vjust=0, colour="blue") +
  labs(x = col_for_scale_O) +
  theme_bw()

O.scale.all
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 4 rows containing non-finite values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_text).

Assess effects: Check the monitoring standards after applying the scale correction

scaleO.mon<-subset(stds, Identifier1==mon.std)
scaleO.dis<-subset(stds, Identifier1==dis.std)
scaleO.dis2<-subset(stds, Identifier1==dis2.std)
scaleO.corr<-subset(stds, Identifier1==corr.std)

(scaleO.mon.mean<-mean(scaleO.mon$d18O.scale))
## [1] -6.63322
(scaleO.mon.sd<-sd(scaleO.mon$d18O.scale))
## [1] 0.2122222
O.stds.table$scaleO.mean <- c(mean(scaleO.corr$d18O.scale), scaleO.mon.mean, mean(scaleO.dis$d18O.scale), mean(scaleO.dis2$d18O.scale))
O.stds.table$scaleO.sd <- c(sd(scaleO.corr$d18O.scale), scaleO.mon.sd, sd(scaleO.dis$d18O.scale), sd(scaleO.dis2$d18O.scale))

6 dataset finalization

6.1 label analyses that should not be used, with explanations for culling, label with values were used in the scale correction

#label as do not use
#label as do not use
data[data$Analysis == 22982, "Do_not_use"] <- "yes"  
data[data$Analysis == 22984, "Do_not_use"] <- "yes"  
data[data$Analysis == 22986, "Do_not_use"] <- "yes"  
data[data$Analysis == 22995, "Do_not_use"] <- "yes"   
data[data$Analysis == 22996, "Do_not_use"] <- "yes"  
data[data$Analysis == 22997, "Do_not_use"] <- "yes"
data[data$Analysis == 22998, "Do_not_use"] <- "yes"  
data[data$Analysis == 22999, "Do_not_use"] <- "yes" 
data[data$Analysis == 23002, "Do_not_use"] <- "yes" 
data[data$Analysis == 23003, "Do_not_use"] <- "yes" 
data[data$Analysis == 23004, "Do_not_use"] <- "yes" 
data[data$Analysis == 23009, "Do_not_use"] <- "yes"
data[data$Analysis == 23011, "Do_not_use"] <- "yes" 
data[data$Analysis == 23012, "Do_not_use"] <- "yes"
data[data$Analysis == 23014, "Do_not_use"] <- "yes" 
data[data$Analysis == 23015, "Do_not_use"] <- "yes" 
data[data$Analysis == 23016, "Do_not_use"] <- "yes"
data[data$Analysis == 23023, "Do_not_use"] <- "yes"
data[data$Analysis == 23024, "Do_not_use"] <- "yes"
data[data$Analysis == 23025, "Do_not_use"] <- "yes"
data[data$Analysis == 23026, "Do_not_use"] <- "yes"



# label those samples with reason they were culled, in main dataset
data[data$Analysis == 22982, "Why_culled"] <- "Atm. in the vial, high stdev"     
data[data$Analysis == 22984, "Why_culled"] <- "Atm. in the vial. high stdev"
data[data$Analysis == 22986, "Why_culled"] <- "too small"  
data[data$Analysis == 22995, "Why_culled"] <- "Atm. in the vial"   
data[data$Analysis == 22996, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 22997, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 22998, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 22999, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23002, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23003, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23004, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23009, "Why_culled"] <- "Atm. in the vial"
data[data$Analysis == 23011, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23012, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23014, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23015, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23016, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23023, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23024, "Why_culled"] <- "too small, high stdev"
data[data$Analysis == 23025, "Why_culled"] <- "Atm. in the vial"     
data[data$Analysis == 23026, "Why_culled"] <- "Atm. in the vial"

# label those samples with reason they were culled, in culled dataset
# culled.data[culled.data$Analysis == xxxx, "Why_culled"] <- "large between peak standard deviations (st)"     
# culled.data[culled.data$Analysis == xxxx, "Why_culled"] <- "high between peak stdev; big half peak could affect data?) (sa)"
# culled.data[culled.data$Analysis == xxxx, "Why_culled"] <- "too small (sa)"
# culled.data[culled.data$Analysis == xxxx, "Why_culled"] <- "too small; sd ok but smaller than linearity range (sa)"

#add column that tells values used in the scale correction
data$d13C_scale_input <- col_for_scale_C
data$d18O_scale_input <- col_for_scale_O
data$Run <- Run

data <- data %>% mutate(d18O.error.S ,d13C.error.S)
# reorder columns for more readable output in output file
data <- data %>%
  select(Run,row, file_id, Analysis, Identifier1, mass, type, num.peaks, area44, area44.sd, inv.area44, PercentCO3, d13C.raw, d13C.sd, d13C.offset, d13C.drift, d13C.lin, d13C.lindrift, d13C.scale, d13C.error.S, d13C_scale_input, d18O.raw.SMOW, d18O.sd, d18O.raw, d18O.offset, d18O.drift, d18O.lin, d18O.lindrift, d18O.scale,d18O.error.S, d18O_scale_input, Do_not_use, Why_culled)
#creates subset of just samples and fully corrected data.
keydata <- data %>% ungroup() %>%  filter(!(Identifier1 %in% c(corr.std, mon.std, dis.std, dis2.std))) %>% select(Run,Analysis, Identifier1, mass,d13C.scale,d13C.error.S, d18O.scale,d18O.error.S,PercentCO3, Do_not_use, Why_culled)

6.2 save data to spreadsheet

add_ws_with_data <- function(wb, sheet, data) {
  addWorksheet(wb, sheet)
  writeData(wb, sheet=sheet, data)
  return(wb)
}

wb <- createWorkbook("data") 
wb <- add_ws_with_data(wb, "key data", keydata)
wb <- add_ws_with_data(wb, "all data corrected", data)
wb <- add_ws_with_data(wb, "offsetC", offsetC)
wb <- add_ws_with_data(wb, "driftC", driftC)
wb <- add_ws_with_data(wb, "linC", linC)
wb <- add_ws_with_data(wb, "lindriftC", lindriftC)
wb <- add_ws_with_data(wb, "C Stds means", C.stds.table)
wb <- add_ws_with_data(wb, "offsetO", offsetO)
wb <- add_ws_with_data(wb, "driftO", driftO)
wb <- add_ws_with_data(wb, "linO", linO)
wb <- add_ws_with_data(wb, "lindriftO", lindriftO)
wb <- add_ws_with_data(wb, "O Stds means", O.stds.table)
wb <- add_ws_with_data(wb, "all stds used", stds)
wb <- add_ws_with_data(wb, "culled data", culled.data)
saveWorkbook(wb, paste0(session, "_corrected_data.xlsx"), overwrite = TRUE)